
import requests
import json
from time import sleep
from tqdm import tqdm
import csv
from bs4 import BeautifulSoup
import re




select = input("请输入要爬的职位:")
k=0
qwe=0
while qwe<80:
    urls=['https://fe-api.zhaopin.com/c/i/sou?start={}&pageSize=80&cityId=489&workExperience=-1&education=-1&companyType=-1&employmentType=-1&jobWelfareTag=-1&kw={}&kt=3'.format(str(pg),select) for pg in range(60,6000,60)]
    url=urls[k]
    print(type(url))
    html=requests.get(url).text
    data=json.loads(html)
    print(data)

    result=data['data']['results']#字典取值
    job=[]
    company=[]
    salary=[]
    address=[]
    link=[]
    wokingexp=[]
    welfare=[]
    typename=[]
    empltype=[]
    edulevel=[]
    createtime=[]
    sizename=[]
    desc=[]
    for i in result:
        j=i['jobName']#工作名称
        c=i['company']['name']#公司名称
        s=i['salary']#薪资
        a=i['city']['display']#工作地点
        l=i['positionURL']#详情
        w=i['workingExp']['name']#工作经验
        p=i['welfare']#福利
        e=i["emplType"]#工作要求
        ed=i['eduLevel']['name']#文凭要求
        u=i['createDate']#发布日期
        job.append(j)
        company.append(c)
        salary.append(s)
        address.append(a)
        link.append(l)
        wokingexp.append(w)
        welfare.append(p)
        empltype.append(e)
        edulevel.append(ed)
        createtime.append(u)
    for li in link:
        req1 = requests.get(li).text
        soup = BeautifulSoup(req1, 'lxml')
        texts = soup.find_all('div', class_='pos-ul')
        soup_texts = BeautifulSoup(str(texts).replace('[]', '\n'), 'lxml').get_text()
        desc.append(soup_texts)
    for asd in link:
        req2 = requests.get(asd).text
        soup1 = r'<li><span class="iconfont icon-promulgator-type"></span><strong>(.*?)</strong></li>'
        soup2 = r'<span class="iconfont icon-promulgator-link"></span><strong>(.*?)</strong>'
        text1 = re.findall(soup1, req2)
        text2 = re.findall(soup2, req2)
        typename.append(text1)#公司性质
        sizename.append(text2)#公司规模
    ws=open("c:/Users/hj137/Desktop/"+select+"智联招聘信息.csv",'a',newline='',encoding='utf-8')
    writer =csv.writer(ws)
    writer.writerow(['工作名称', '公司名称', '薪资', '工作地点', '详情网址', '工作经验', '福利', '工作要求', '文凭要求', '发布日期','岗位需求','公司性质','公司规模'])
    for jobs in range(len(job)):
        writer.writerow([job[jobs],company[jobs],salary[jobs],address[jobs],link[jobs],wokingexp[jobs],welfare[jobs],empltype[jobs],edulevel[jobs],createtime[jobs],desc[jobs],typename[jobs],sizename[jobs]])

    for oo in tqdm(range(len(job))):
        sleep(0.01)
    k=k+1
    qwe=qwe+1
    if qwe>=80:
        print('爬取完成!')
        break



